from datetime import date
import polars as pl
from hamilton_sdk.tracking import polars_stats as ps


def test_compute_stats_df():
    df = pl.DataFrame(
        {
            "a": [1, 2, 3, 4, 5],
            "b": ["a", "b", "c", "d", "e"],
            "c": [True, False, True, False, True],
            "d": [1.0, 2.0, 3.0, 4.0, 5.0],
            "e": pl.Series(["a", "b", "c", "d", "e"], dtype=pl.Categorical),
            "f": pl.Series(["a", "b", "c", "d", "e"], dtype=pl.Utf8),
            "g": pl.Series(["a", "b", "c", "d", "e"], dtype=pl.Object),
            "h": pl.Series(
                ["2024-01-01", "2024-01-03", "2024-01-06", "2024-01-10", "2024-01-14"]
            ).str.to_date(),
            "i": pl.Series([None, None, None, None, None], dtype=pl.Float64),
            "j": pl.date_range(date(2022, 1, 1), date(2022, 5, 1), "1mo", eager=True),
        }
    )
    actual = ps.compute_stats_df(df, "test", {})
    expected_stats = {
        "observability_schema_version": "0.0.3",
        "observability_type": "dagworks_describe",
        "observability_value": {
            "a": {
                "base_data_type": "numeric",
                "count": 5,
                "data_type": "Int64",
                "histogram": {
                    "[1.0, 1.4]": 1,
                    "(1.4, 1.8]": 0,
                    "(1.8, 2.2]": 1,
                    "(2.2, 2.6]": 0,
                    "(2.6, 3.0]": 1,
                    "(3.0, 3.4]": 0,
                    "(3.4, 3.8]": 0,
                    "(3.8, 4.2]": 1,
                    "(4.2, 4.6]": 0,
                    "(4.6, 5.0]": 1,
                },
                "max": 5,
                "mean": 3.0,
                "min": 1,
                "missing": 0,
                "name": "a",
                "pos": 0,
                "quantiles": {0.1: 1.0, 0.25: 2.0, 0.5: 3.0, 0.75: 4.0, 0.9: 5.0},
                "std": 1.5811388300841898,
                "zeros": 0,
            },
            "b": {
                "avg_str_len": 1.0,
                "base_data_type": "str",
                "count": 5,
                "data_type": "String",
                "empty": 0,
                "missing": 0,
                "name": "b",
                "pos": 1,
                "std_str_len": 0.0,
            },
            "c": {
                "base_data_type": "boolean",
                "count": 5,
                "data_type": "Boolean",
                "missing": 0,
                "name": "c",
                "pos": 2,
                "zeros": 0,
            },
            "d": {
                "base_data_type": "numeric",
                "count": 5,
                "data_type": "Float64",
                "histogram": {
                    "[1.0, 1.4]": 1,
                    "(1.4, 1.8]": 0,
                    "(1.8, 2.2]": 1,
                    "(2.2, 2.6]": 0,
                    "(2.6, 3.0]": 1,
                    "(3.0, 3.4]": 0,
                    "(3.4, 3.8]": 0,
                    "(3.8, 4.2]": 1,
                    "(4.2, 4.6]": 0,
                    "(4.6, 5.0]": 1,
                },
                "max": 5.0,
                "mean": 3.0,
                "min": 1.0,
                "missing": 0,
                "name": "d",
                "pos": 3,
                "quantiles": {0.1: 1.0, 0.25: 2.0, 0.5: 3.0, 0.75: 4.0, 0.9: 5.0},
                "std": 1.5811388300841898,
                "zeros": 0,
            },
            "e": {
                "base_data_type": "category",
                "count": 5,
                "data_type": "Categorical(ordering='physical')",
                "domain": {"a": 1, "b": 1, "c": 1, "d": 1, "e": 1},
                "empty": 0,
                "missing": 0,
                "name": "e",
                "pos": 4,
                "top_freq": 1,
                "top_value": "a",
                "unique": 5,
            },
            "f": {
                "avg_str_len": 1.0,
                "base_data_type": "str",
                "count": 5,
                "data_type": "String",
                "empty": 0,
                "missing": 0,
                "name": "f",
                "pos": 5,
                "std_str_len": 0.0,
            },
            "g": {
                "base_data_type": "unhandled",
                "count": 5,
                "data_type": "Object",
                "missing": 0,
                "name": "g",
                "pos": 6,
            },
            "h": {
                "base_data_type": "datetime",
                "count": 5,
                "data_type": "Date",
                "histogram": {},
                "max": "2024-01-14",
                "mean": "2024-01-06T19:12:00",
                "min": "2024-01-01",
                "missing": 0,
                "name": "h",
                "pos": 7,
                "quantiles": {},
                "std": 0.0,
                "zeros": 0,
            },
            "i": {
                "base_data_type": "numeric",
                "count": 5,
                "data_type": "Float64",
                "histogram": {},
                "max": None,
                "mean": None,
                "min": None,
                "missing": 5,
                "name": "i",
                "pos": 8,
                "quantiles": {0.1: None, 0.25: None, 0.5: None, 0.75: None, 0.9: None},
                "std": None,
                "zeros": 0,
            },
            "j": {
                "base_data_type": "datetime",
                "count": 5,
                "data_type": "Date",
                "histogram": {},
                "max": "2022-05-01",
                "mean": "2022-03-02T00:00:00",
                "min": "2022-01-01",
                "missing": 0,
                "name": "j",
                "pos": 9,
                "quantiles": {},
                "std": 0.0,
                "zeros": 0,
            },
        },
    }
    assert actual == expected_stats
